msg_tool\scripts\bgi\archive/
dsc.rs

1//! Buriko General Interpreter/Ethornell compressed file in archive
2use crate::ext::io::*;
3use crate::ext::vec::*;
4use crate::scripts::base::*;
5use crate::types::*;
6use crate::utils::bit_stream::*;
7use anyhow::Result;
8use rand::Rng;
9use std::collections::BinaryHeap;
10use std::io::{Seek, Write};
11
12#[derive(Debug)]
13struct HuffmanCode {
14    code: u16,
15    depth: u8,
16}
17
18impl std::cmp::PartialEq for HuffmanCode {
19    fn eq(&self, other: &Self) -> bool {
20        self.code == other.code && self.depth == other.depth
21    }
22}
23
24impl std::cmp::Eq for HuffmanCode {}
25
26impl std::cmp::PartialOrd for HuffmanCode {
27    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
28        let cmp = self.depth.cmp(&other.depth);
29        if cmp == std::cmp::Ordering::Equal {
30            Some(self.code.cmp(&other.code))
31        } else {
32            Some(cmp)
33        }
34    }
35}
36
37impl std::cmp::Ord for HuffmanCode {
38    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
39        let cmp = self.depth.cmp(&other.depth);
40        if cmp == std::cmp::Ordering::Equal {
41            self.code.cmp(&other.code)
42        } else {
43            cmp
44        }
45    }
46}
47
48#[derive(Clone, Debug)]
49struct HuffmanNode {
50    is_parent: bool,
51    code: Option<u16>,
52    left_index: usize,
53    right_index: usize,
54}
55
56/// Decoder for Buriko General Interpreter/Ethornell compressed files (DSC format).
57pub struct DscDecoder<'a> {
58    stream: MsbBitStream<MemReaderRef<'a>>,
59    key: u32,
60    magic: u32,
61    output_size: u32,
62    dec_count: u32,
63}
64
65impl<'a> DscDecoder<'a> {
66    /// Creates a new DscDecoder from the given data slice.
67    pub fn new(data: &'a [u8]) -> Result<Self> {
68        let mut reader = MemReaderRef::new(data);
69        let magic = (reader.read_u16()? as u32) << 16;
70        reader.pos = 0x10;
71        let key = reader.read_u32()?;
72        let output_size = reader.read_u32()?;
73        let dec_count = reader.read_u32()?;
74        let stream = MsbBitStream::new(reader);
75        Ok(DscDecoder {
76            stream,
77            key,
78            magic,
79            output_size,
80            dec_count,
81        })
82    }
83
84    /// Unpacks the DSC file and returns the decompressed data.
85    pub fn unpack(mut self) -> Result<Vec<u8>> {
86        self.stream.m_input.pos = 0x20;
87        let mut codes = Vec::new();
88        for i in 0..512 {
89            let src = self.stream.m_input.read_u8()?;
90            let depth = src.overflowing_sub(self.update_key()).0;
91            if depth > 0 {
92                codes.push(HuffmanCode { code: i, depth })
93            }
94        }
95        codes.sort();
96        let root = Self::create_huffman_tree(codes);
97        self.huffman_decompress(root)
98    }
99
100    fn create_huffman_tree(codes: Vec<HuffmanCode>) -> Vec<HuffmanNode> {
101        let mut trees = Vec::with_capacity(1024);
102        trees.resize(
103            1024,
104            HuffmanNode {
105                is_parent: false,
106                code: None,
107                left_index: 0,
108                right_index: 0,
109            },
110        );
111        let mut left_index = vec![0usize; 512];
112        let mut right_index = vec![0usize; 512];
113        let mut next_node_index = 1usize;
114        let mut depth_nodes = 1usize;
115        let mut depth = 0u8;
116        let mut left_child = true;
117        let mut n = 0;
118        while n < codes.len() {
119            let huffman_node_index = left_child;
120            left_child = !left_child;
121            let mut depth_existed_nodes = 0;
122            while n < codes.len() && codes[n].depth == depth {
123                let index = if huffman_node_index {
124                    left_index[depth_existed_nodes]
125                } else {
126                    right_index[depth_existed_nodes]
127                };
128                trees[index].code = Some(codes[n].code);
129                n += 1;
130                depth_existed_nodes += 1;
131            }
132            let depth_nodes_to_create = depth_nodes - depth_existed_nodes;
133            for i in 0..depth_nodes_to_create {
134                let index = if huffman_node_index {
135                    left_index[depth_existed_nodes + i]
136                } else {
137                    right_index[depth_existed_nodes + i]
138                };
139                let node = &mut trees[index];
140                node.is_parent = true;
141                if left_child {
142                    left_index[i * 2] = next_node_index;
143                    node.left_index = next_node_index;
144                    next_node_index += 1;
145                    left_index[i * 2 + 1] = next_node_index;
146                    node.right_index = next_node_index;
147                    next_node_index += 1;
148                } else {
149                    right_index[i * 2] = next_node_index;
150                    node.left_index = next_node_index;
151                    next_node_index += 1;
152                    right_index[i * 2 + 1] = next_node_index;
153                    node.right_index = next_node_index;
154                    next_node_index += 1;
155                }
156            }
157            depth += 1;
158            depth_nodes = depth_nodes_to_create * 2;
159        }
160        trees
161    }
162
163    fn huffman_decompress(&mut self, nodes: Vec<HuffmanNode>) -> Result<Vec<u8>> {
164        let output_size = self.output_size as usize;
165        let mut output = Vec::with_capacity(output_size);
166        let mut dst = 0;
167        output.resize(output_size, 0);
168        for _ in 0..self.dec_count {
169            let mut current_node = &nodes[0];
170            loop {
171                let bit = self.stream.get_next_bit()?;
172                if !bit {
173                    current_node = &nodes[current_node.left_index]
174                } else {
175                    current_node = &nodes[current_node.right_index]
176                }
177                if !current_node.is_parent {
178                    break;
179                }
180            }
181            let code = *current_node.code.as_ref().unwrap();
182            if code >= 256 {
183                let mut offset = self.stream.get_bits(12)?;
184                let count = ((code & 0xFF) + 2) as usize;
185                offset += 2;
186                output.copy_overlapped(dst - offset as usize, dst, count);
187                dst += count;
188            } else {
189                output[dst] = code as u8;
190                dst += 1;
191            }
192        }
193        if dst != output_size {
194            eprintln!(
195                "Warning: Output size mismatch, expected {}, got {}",
196                self.output_size, dst
197            );
198            crate::COUNTER.inc_warning();
199        }
200        Ok(output)
201    }
202
203    fn update_key(&mut self) -> u8 {
204        let v0 = 20021 * (self.key & 0xffff);
205        let mut v1 = self.magic | (self.key >> 16);
206        v1 = v1
207            .overflowing_mul(20021)
208            .0
209            .overflowing_add(self.key.overflowing_mul(346).0)
210            .0;
211        v1 = (v1 + (v0 >> 16)) & 0xffff;
212        self.key = (v1 << 16) + (v0 & 0xffff) + 1;
213        v1 as u8
214    }
215}
216
217#[derive(Debug, Clone, Copy)]
218enum LzssOp {
219    Literal(u8),
220    Match { len: u16, offset: u16 },
221}
222
223#[derive(Debug)]
224struct FreqNode {
225    freq: u32,
226    symbol: Option<u16>,
227    left: Option<Box<FreqNode>>,
228    right: Option<Box<FreqNode>>,
229}
230impl PartialEq for FreqNode {
231    fn eq(&self, other: &Self) -> bool {
232        self.freq == other.freq
233    }
234}
235impl Eq for FreqNode {}
236impl PartialOrd for FreqNode {
237    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
238        Some(self.cmp(other))
239    }
240}
241impl Ord for FreqNode {
242    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
243        other.freq.cmp(&self.freq)
244    }
245}
246
247fn calculate_huffman_depths(freqs: &[u32]) -> Vec<u8> {
248    const MAX_DEPTH: u8 = 9;
249
250    // 收集所有非零频率的符号
251    let mut symbols_with_freq: Vec<(u16, u32)> = freqs
252        .iter()
253        .enumerate()
254        .filter_map(|(symbol, &freq)| {
255            if freq > 0 {
256                Some((symbol as u16, freq))
257            } else {
258                None
259            }
260        })
261        .collect();
262
263    let mut depths = vec![0u8; 512];
264
265    if symbols_with_freq.is_empty() {
266        return depths;
267    }
268
269    if symbols_with_freq.len() == 1 {
270        depths[symbols_with_freq[0].0 as usize] = 1;
271        return depths;
272    }
273
274    // 使用受限Huffman算法
275    loop {
276        let current_depths = build_huffman_tree(&symbols_with_freq);
277        let max_depth = current_depths.iter().max().copied().unwrap_or(0);
278
279        if max_depth <= MAX_DEPTH {
280            // 将深度映射回原始数组
281            for &(symbol, _) in &symbols_with_freq {
282                let symbol_index = symbols_with_freq
283                    .iter()
284                    .position(|(s, _)| *s == symbol)
285                    .unwrap();
286                depths[symbol as usize] = current_depths[symbol_index];
287            }
288            break;
289        }
290
291        // 如果深度超限,调整频率
292        adjust_frequencies_for_depth_limit(&mut symbols_with_freq);
293    }
294
295    depths
296}
297
298fn build_huffman_tree(symbols_with_freq: &[(u16, u32)]) -> Vec<u8> {
299    let mut heap = BinaryHeap::new();
300
301    // 添加所有叶子节点
302    for &(symbol, freq) in symbols_with_freq {
303        heap.push(FreqNode {
304            freq,
305            symbol: Some(symbol),
306            left: None,
307            right: None,
308        });
309    }
310
311    // 构建Huffman树
312    while heap.len() > 1 {
313        let node1 = heap.pop().unwrap();
314        let node2 = heap.pop().unwrap();
315        let new_node = FreqNode {
316            freq: node1.freq + node2.freq,
317            symbol: None,
318            left: Some(Box::new(node1)),
319            right: Some(Box::new(node2)),
320        };
321        heap.push(new_node);
322    }
323
324    // 计算深度
325    let mut depths = vec![0u8; symbols_with_freq.len()];
326    if let Some(root) = heap.pop() {
327        calculate_depths(&root, 0, symbols_with_freq, &mut depths);
328    }
329
330    depths
331}
332
333fn calculate_depths(
334    node: &FreqNode,
335    depth: u8,
336    symbols_with_freq: &[(u16, u32)],
337    depths: &mut [u8],
338) {
339    if let Some(symbol) = node.symbol {
340        let symbol_index = symbols_with_freq
341            .iter()
342            .position(|(s, _)| *s == symbol)
343            .unwrap();
344        depths[symbol_index] = if depth == 0 { 1 } else { depth };
345    } else {
346        if let Some(ref left) = node.left {
347            calculate_depths(left, depth + 1, symbols_with_freq, depths);
348        }
349        if let Some(ref right) = node.right {
350            calculate_depths(right, depth + 1, symbols_with_freq, depths);
351        }
352    }
353}
354
355fn adjust_frequencies_for_depth_limit(symbols_with_freq: &mut [(u16, u32)]) {
356    // 按频率排序
357    symbols_with_freq.sort_by(|a, b| a.1.cmp(&b.1));
358
359    // 使用Package-Merge算法的简化版本
360    // 这里使用一个启发式方法:增加低频符号的频率
361    let min_freq = symbols_with_freq[0].1;
362    let adjustment = (min_freq as f64 * 0.1).max(1.0) as u32;
363
364    // 找到频率最低的几个符号并调整它们的频率
365    let num_to_adjust = (symbols_with_freq.len() / 4).max(1);
366    for i in 0..num_to_adjust.min(symbols_with_freq.len()) {
367        symbols_with_freq[i].1 += adjustment;
368    }
369}
370
371fn generate_canonical_codes(depths: &[u8]) -> Vec<Option<(u16, u8)>> {
372    let mut codes_with_depths = vec![];
373    for (symbol, &depth) in depths.iter().enumerate() {
374        if depth > 0 {
375            codes_with_depths.push((symbol as u16, depth));
376        }
377    }
378    codes_with_depths.sort_by(|a, b| {
379        let depth_cmp = a.1.cmp(&b.1);
380        if depth_cmp == std::cmp::Ordering::Equal {
381            a.0.cmp(&b.0)
382        } else {
383            depth_cmp
384        }
385    });
386
387    let mut huffman_codes = vec![None; 512];
388    let mut current_code = 0u16;
389    let mut last_depth = 0u8;
390
391    for &(symbol, depth) in &codes_with_depths {
392        if last_depth != 0 {
393            current_code <<= depth - last_depth;
394        }
395        huffman_codes[symbol as usize] = Some((current_code, depth));
396        current_code += 1;
397        last_depth = depth;
398    }
399
400    huffman_codes
401}
402
403/// Encoder for Buriko General Interpreter/Ethornell compressed files (DSC format).
404pub struct DscEncoder<'a, T: Write + Seek> {
405    stream: MsbBitWriter<'a, T>,
406    magic: u32,
407    key: u32,
408    dec_count: u32,
409    min_len: usize,
410}
411
412impl<'a, T: Write + Seek> DscEncoder<'a, T> {
413    /// Creates a new DscEncoder with the given writer and minimum length for LZSS compression.
414    pub fn new(writer: &'a mut T, min_len: usize) -> Self {
415        let stream = MsbBitWriter::new(writer);
416        DscEncoder {
417            stream,
418            magic: 0x5344 << 16, // "DS"
419            key: rand::rng().random(),
420            dec_count: 0,
421            min_len,
422        }
423    }
424
425    /// Packs the given data into the DSC format using LZSS compression.
426    pub fn pack(mut self, data: &[u8]) -> Result<()> {
427        // LZSS compression
428        let mut ops = vec![];
429        let mut pos = 0;
430
431        const MAX_LEN: usize = 257;
432        const WINDOW_SIZE: usize = 4097;
433
434        let mut head: Vec<i32> = vec![-1; 1 << 16];
435        let mut prev: Vec<i32> = vec![-1; data.len()];
436
437        while pos < data.len() {
438            let max_len = (data.len() - pos).min(MAX_LEN);
439            let mut best_len = 0;
440            let mut best_offset = 0;
441
442            if max_len >= self.min_len {
443                let limit = pos.saturating_sub(WINDOW_SIZE);
444                let key = (data[pos] as u16) << 8 | data[pos + 1] as u16;
445                let mut match_pos_i32 = head[key as usize];
446
447                while match_pos_i32 != -1 {
448                    let match_pos = match_pos_i32 as usize;
449                    if match_pos < limit {
450                        break;
451                    }
452
453                    if data.get(match_pos + best_len) == data.get(pos + best_len) {
454                        let mut current_len = 0;
455                        for i in 0..max_len {
456                            if data.get(pos + i) != data.get(match_pos + i) {
457                                break;
458                            }
459                            current_len += 1;
460                        }
461
462                        if current_len > best_len {
463                            best_len = current_len;
464                            best_offset = pos - match_pos;
465                            if best_len >= max_len {
466                                break;
467                            }
468                        }
469                    }
470                    match_pos_i32 = prev[match_pos];
471                }
472            }
473
474            if best_len >= self.min_len && best_offset >= 2 {
475                ops.push(LzssOp::Match {
476                    len: best_len as u16,
477                    offset: best_offset as u16,
478                });
479                for i in 0..best_len {
480                    if pos + i + 1 < data.len() {
481                        let key = (data[pos + i] as u16) << 8 | data[pos + i + 1] as u16;
482                        let current_pos = pos + i;
483                        prev[current_pos] = head[key as usize];
484                        head[key as usize] = current_pos as i32;
485                    }
486                }
487                pos += best_len;
488            } else {
489                ops.push(LzssOp::Literal(data[pos]));
490                if pos + 1 < data.len() {
491                    let key = (data[pos] as u16) << 8 | data[pos + 1] as u16;
492                    prev[pos] = head[key as usize];
493                    head[key as usize] = pos as i32;
494                }
495                pos += 1;
496            }
497        }
498
499        let symbols: Vec<u16> = ops
500            .iter()
501            .map(|op| match op {
502                LzssOp::Literal(byte) => *byte as u16,
503                LzssOp::Match { len, .. } => 256 + (len - 2),
504            })
505            .collect();
506        self.dec_count = symbols.len() as u32;
507
508        let mut freqs = vec![0u32; 512];
509        for &s in &symbols {
510            freqs[s as usize] += 1;
511        }
512
513        let depths = calculate_huffman_depths(&freqs);
514        let huffman_codes = generate_canonical_codes(&depths);
515
516        self.stream.writer.write_all(b"DSC FORMAT 1.00\0")?;
517        self.stream.writer.seek(std::io::SeekFrom::Start(0x10))?;
518        self.stream.writer.write_u32(self.key)?;
519        self.stream.writer.write_u32(data.len() as u32)?;
520        self.stream.writer.write_u32(self.dec_count)?;
521        self.stream.writer.seek(std::io::SeekFrom::Start(0x20))?;
522
523        for depth in depths.iter() {
524            let key = self.update_key();
525            self.stream.writer.write_u8(depth.overflowing_add(key).0)?;
526        }
527
528        for op in &ops {
529            match op {
530                LzssOp::Literal(byte) => {
531                    let symbol = *byte as u16;
532                    let (code, len) = huffman_codes[symbol as usize].unwrap();
533                    self.stream.put_bits(code as u32, len)?;
534                }
535                LzssOp::Match { len, offset } => {
536                    let symbol = 256 + (len - 2);
537                    let (code, huff_len) = huffman_codes[symbol as usize].unwrap();
538                    self.stream.put_bits(code as u32, huff_len)?;
539                    self.stream.put_bits((*offset - 2) as u32, 12)?;
540                }
541            }
542        }
543        self.stream.flush()?;
544        Ok(())
545    }
546
547    fn update_key(&mut self) -> u8 {
548        let v0 = 20021 * (self.key & 0xffff);
549        let mut v1 = self.magic | (self.key >> 16);
550        v1 = v1
551            .overflowing_mul(20021)
552            .0
553            .overflowing_add(self.key.overflowing_mul(346).0)
554            .0;
555        v1 = (v1 + (v0 >> 16)) & 0xffff;
556        self.key = (v1 << 16) + (v0 & 0xffff) + 1;
557        v1 as u8
558    }
559}
560
561#[derive(Debug)]
562/// Builder for DSC scripts.
563pub struct DscBuilder {}
564
565impl DscBuilder {
566    /// Creates a new instance of `DscBuilder`.
567    pub fn new() -> Self {
568        DscBuilder {}
569    }
570}
571
572impl ScriptBuilder for DscBuilder {
573    fn default_encoding(&self) -> Encoding {
574        Encoding::Cp932
575    }
576
577    fn default_archive_encoding(&self) -> Option<Encoding> {
578        Some(Encoding::Cp932)
579    }
580
581    fn build_script(
582        &self,
583        buf: Vec<u8>,
584        _filename: &str,
585        _encoding: Encoding,
586        _archive_encoding: Encoding,
587        config: &ExtraConfig,
588        _archive: Option<&Box<dyn Script>>,
589    ) -> Result<Box<dyn Script>> {
590        Ok(Box::new(Dsc::new(buf, config)?))
591    }
592
593    fn extensions(&self) -> &'static [&'static str] {
594        &[]
595    }
596
597    fn script_type(&self) -> &'static ScriptType {
598        &ScriptType::BGIDsc
599    }
600
601    fn is_this_format(&self, _filename: &str, buf: &[u8], buf_len: usize) -> Option<u8> {
602        if buf_len >= 16 && buf.starts_with(b"DSC FORMAT 1.00\0") {
603            return Some(255);
604        }
605        None
606    }
607
608    fn can_create_file(&self) -> bool {
609        true
610    }
611
612    fn create_file<'a>(
613        &'a self,
614        filename: &'a str,
615        mut writer: Box<dyn WriteSeek + 'a>,
616        _encoding: Encoding,
617        _file_encoding: Encoding,
618        config: &ExtraConfig,
619    ) -> Result<()> {
620        let encoder = DscEncoder::new(&mut writer, config.bgi_compress_min_len);
621        let data = crate::utils::files::read_file(filename)?;
622        encoder.pack(&data)?;
623        Ok(())
624    }
625}
626
627#[derive(Debug)]
628/// DSC script
629pub struct Dsc {
630    data: Vec<u8>,
631    min_len: usize,
632}
633
634impl Dsc {
635    /// Creates a new Dsc script
636    ///
637    /// * `buf` - The buffer containing the DSC data.
638    /// * `config` - Extra configuration options.
639    pub fn new(buf: Vec<u8>, config: &ExtraConfig) -> Result<Self> {
640        if buf.len() < 16 || !buf.starts_with(b"DSC FORMAT 1.00\0") {
641            return Err(anyhow::anyhow!("Invalid DSC format"));
642        }
643        let decoder = DscDecoder::new(&buf)?;
644        let data = decoder.unpack()?;
645        Ok(Dsc {
646            data,
647            min_len: config.bgi_compress_min_len,
648        })
649    }
650}
651
652impl Script for Dsc {
653    fn default_output_script_type(&self) -> OutputScriptType {
654        OutputScriptType::Custom
655    }
656
657    fn is_output_supported(&self, output: OutputScriptType) -> bool {
658        matches!(output, OutputScriptType::Custom)
659    }
660
661    fn default_format_type(&self) -> FormatOptions {
662        FormatOptions::None
663    }
664
665    fn custom_output_extension(&self) -> &'static str {
666        ""
667    }
668
669    fn custom_export(&self, filename: &std::path::Path, _encoding: Encoding) -> Result<()> {
670        let mut f = std::fs::File::create(filename)?;
671        f.write_all(&self.data)?;
672        Ok(())
673    }
674
675    fn custom_import<'a>(
676        &'a self,
677        custom_filename: &'a str,
678        mut file: Box<dyn WriteSeek + 'a>,
679        _encoding: Encoding,
680        _output_encoding: Encoding,
681    ) -> Result<()> {
682        let encoder = DscEncoder::new(&mut file, self.min_len);
683        let data = crate::utils::files::read_file(custom_filename)?;
684        encoder.pack(&data)?;
685        Ok(())
686    }
687}
688
689/// Parses the minimum length for LZSS compression from a string.
690pub fn parse_min_length(len: &str) -> Result<usize, String> {
691    clap_num::number_range(len, 2, 256)
692}